Load Packages

First I need to load up the packages I’ll need

library(sf)
## Linking to GEOS 3.4.2, GDAL 2.1.2, proj.4 4.9.1
library(ggplot2) #development version!
## devtools::install_github("tidyverse/ggplot2")
library(tidyverse)
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag():    dplyr, stats
library(readr)
library(cowplot)
## 
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggplot2':
## 
##     ggsave
library(sp)
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
library(dplyr)
library(ggrepel)
library(plyr)
## -------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## -------------------------------------------------------------------------
## 
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## The following object is masked from 'package:purrr':
## 
##     compact

Import Postcode Data

Now I import my data. I filter for the Arran postcodes, (since Arran all begins ‘KA27’).

## Finding the Arran coordinates
arrancoordinates <- read.csv("alldata/ukpostcodes.csv") %>%
 filter(substr(postcode,1,4)=="KA27")

arransubsect <- read_sf("alldata/Scotland_pcs_2011") %>%
filter(substr(label,1,4)=="KA27")

#pcs <- read_sf("alldata/Scotland_pcs_2011")
#arransubsect <- filter(pcs,substr(label,1,4)=="KA27")

Import SIMD data

Now I load the SIMD data, containing the geometries (shapefiles) and SIMD data (percentiles, etc)

#Import SIMD data from http://www.gov.scot/Topics/Statistics/SIMD
#https://data.gov.uk/dataset/scottish-index-of-multiple-deprivation-simd-2012
#https://data.gov.uk/dataset/scottish-index-of-multiple-deprivation-simd-2012/resource/d6fa8924-83da-4e80-a560-4ef0477f230b
DZBoundaries2016 <- read_sf("./alldata/SG_SIMD_2016")
DZBoundaries2012 <- read_sf("./alldata/SG_SIMD_2012")
DZBoundaries2009 <- read_sf("./alldata/SG_SIMD_2009")
DZBoundaries2006 <- read_sf("./alldata/SG_SIMD_2006")
DZBoundaries2004 <- read_sf("./alldata/SG_SIMD_2004")

Select Arran-specific SIMD data

I have to choose the right columns manually in order to select the Arran data.

#Selecting Arran data from Scotland (2016)
#Find postcode look-up from below file for KA27 postcodes. Find unique DZ. Find row positions.
#SIMD2016 <-read.csv("./alldata/00505244.csv")
#Selecting ArranDZ2016
#Arrandz2016 <- c(4672,4666,4669,4671,4667,4668,4670)
#arran2016 <- DZBoundaries2016[Arrandz2016,]
arran2016 <- DZBoundaries2016[c(4672,4666,4669,4671,4667,4668,4670),]
#Reorder arran 2016
reorderedvector<- c("S01011174", "S01011171", "S01011177", "S01011176", "S01011175", "S01011173", "S01011172" )
arran2016 <- arran2016 %>%
  slice(match(reorderedvector, DataZone))

#Find postcode look-up, KA27 postcodes. Find unique DZ. Find row positions.
#Selecting ArranDZ2012
Arrandz2012 <- c(4409,4372,4353,4352,4351,4350,4349)

#2012
arran2012 <- DZBoundaries2012[Arrandz2012,]
#2009
arran2009 <- DZBoundaries2009[Arrandz2012,]
#2006
arran2006 <- DZBoundaries2006[Arrandz2012,]
#2004
arran2004 <- DZBoundaries2004[Arrandz2012,]

Combine the data I want into one dataframe

Now I want to plot all the data, first I combine it all into one table. First I subselect the data I want from the appropriate columns.

arran20162 <- arran2016 %>%
  select(DataZone, geometry, Percentile)  %>%
  mutate(year="2016")

arran20122 <- arran2012 %>%
  select(DataZone, geometry, Percentile) %>%
  mutate(year="2012")

arran20092 <- arran2009 %>%
  select(DataZone, geometry, Percentile) %>%
  mutate(year="2009")

arran20062 <- arran2006 %>%
  select(DataZone, geometry, Percentile) %>%
  mutate(year="2006")

arran20042 <- arran2004 %>%
  select(DataZone, geometry, Percentile) %>%
  mutate(year="2004")

#Now I add it together
arransimd <- rbind(arran20162,arran20122,arran20092,arran20062,arran20042) %>%
mutate(
    lon = map_dbl(geometry, ~st_centroid(.x)[[1]]),
    lat = map_dbl(geometry, ~st_centroid(.x)[[2]])
    )

Grouping the pre-2016/post-2016 DataZones

arransimd$listID <- revalue(arransimd$DataZone,
               c("S01004409"="S01004409/S01011174", "S01004372"="S01004372/S01011171", "S01004353"="S01004353/S01011177", "S01004352"="S01004352/S01011176", "S01004351"="S01004351/S01011175", "S01004350"="S01004350/S01011173", "S01004349"="S01004349/S01011172", "S01011174"="S01004409/S01011174", "S01011171"="S01004372/S01011171", "S01011177"="S01004353/S01011177", "S01011176"="S01004352/S01011176", "S01011175"="S01004351/S01011175", "S01011173"="S01004350/S01011173", "S01011172"="S01004349/S01011172"))

The reason I’ve downloaded all the DataZones shapefiles individually is because they change between 2016 and 2012. See the small differences.

rbind(arran20162,arran20122) %>%
  ggplot() +
  geom_sf(aes(fill = DataZone)) +
  facet_wrap('year') +
  theme_grey() +
  theme(legend.position="none") +
  theme(axis.text.x=element_text(angle=45, hjust = 1))

Arran SIMD Health Percentiles

Now I want to look at percentile data.

arransimd %>%
filter(listID == unique(arransimd$listID))  %>%
  ggplot() +
  geom_sf(aes(fill = Percentile)) +
  facet_wrap('year', nrow=1) +
  theme_grey() +
  geom_text(aes(label = Percentile, x = lon, y = lat), size = 2, colour = "white") +
  theme(axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank(),
        axis.title.x = element_blank(),
        axis.title.y = element_blank()) +
  theme(legend.position="bottom")  

There we are. The SIMD health percentiles of Arran zones throughout SIMD history. And I’ve learned a little bit about graphics in R.

See this image at a larger size.

Now I want to overlay the postcodes by Datazone. To do this I’ve converted both the Arran coordinates and Arran (2016) shapefiles into spatial points/polygons, converted them into a common CRS, and then compared them by using ‘plyr::over()’. This gives me the object ‘namingdzpostcode’, with the postcode rows grouped into IDs (unidentified datazones).

simple.sf <- st_as_sf(arrancoordinates, coords=c('longitude','latitude'))
st_crs(simple.sf) <- 4326

exampleshapes <- sf:::as_Spatial(arran2016$geometry)
examplepoints <- sf:::as_Spatial(simple.sf$geom)

examplepoints <- spTransform(examplepoints, CRS("+proj=longlat +datum=WGS84"))
exampleshapes <- spTransform(exampleshapes, CRS("+proj=longlat +datum=WGS84"))

namingdzpostcode <- over(exampleshapes, examplepoints, returnList = TRUE)

I can then take a member reference from the orginal postcode list, which gives me a selection of the rows in that DZ. For simplicity I’ve written this as a new function.

Unfortunately, I haven’t worked out how to coordinate the new ID with the original DZ names yet, so I have to select by using the appropriate ID for each DZ. (I plotted each over a map to see where it fit.)

listID <- list(1,2,3,4,5,6,7)

Mutate arrancoordinates to label the IDs

function100 <- function(argument) 
{
  argument <- arrancoordinates[namingdzpostcode[[argument]],] %>% mutate(DataZone=argument)
}

newarrancoordinates <- lapply(1:7,function100)
newarrancoordinates <- rbind(newarrancoordinates[[1]], newarrancoordinates[[2]], newarrancoordinates[[3]], newarrancoordinates[[4]], newarrancoordinates[[5]], newarrancoordinates[[6]], newarrancoordinates[[7]])

newarrancoordinates$listID <- revalue(as.character(newarrancoordinates$DataZone),
               c('1'="S01004409/S01011174", '2'="S01004372/S01011171", '3'="S01004353/S01011177", '4'="S01004352/S01011176", '5'="S01004351/S01011175", '6'="S01004350/S01011173", '7'="S01004349/S01011172"))

Individual Datazones

filter(arransimd, year == 2016) %>%
  ggplot() +
  theme_grey() +
  theme(axis.text.x=element_blank(),
        axis.ticks.x=element_blank()) +
  theme(legend.position="none") +
  facet_wrap('listID', nrow = 1) +
  geom_sf(aes(fill = DataZone))

Plotting DataZones by facet_grid

arransimd %>%
ggplot() +
  geom_sf(aes(fill = Percentile)) +
  facet_grid(listID ~ year) +
  theme_grey() +
  geom_text(aes(label = Percentile, x = lon, y = lat), size = 2, colour = "white") +
  theme(axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.title.x = element_blank(),
        axis.title.y = element_blank()) +
  theme(legend.position="bottom")

See this plot at a proper scale.

Postcodes overlayed onto datazone.

ggplot() +
  geom_sf(data=arransimd) +
  geom_point(data=newarrancoordinates, 
             mapping = aes(x = longitude, y = latitude), 
             size=1) +
  theme_grey() +
  theme(axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank()) +
  coord_sf(crs= 4326, datum = sf::st_crs(4326)) +
  facet_wrap('listID', nrow = 1)

thisfunction2 <- function(argument) 
{arransimd %>%
  filter(listID == argument)  %>%
  ggplot() +
  geom_sf() +
  theme_grey() +
  geom_point(data = filter(newarrancoordinates, listID == argument), 
             mapping = aes(x = longitude, y = latitude), size=1) +
  geom_text_repel(data = filter(newarrancoordinates, listID == argument), 
            aes(label = filter(newarrancoordinates, 
                               listID == argument)$postcode, 
                x = longitude, y = latitude), size=2) +
  theme(axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank()) +
  coord_sf(crs= 4326, datum = sf::st_crs(4326))
}

output2 <- lapply(unique(arransimd$listID), thisfunction2)
grid.arrange(output2[[1]], output2[[2]], output2[[3]], output2[[4]], output2[[5]], output2[[6]], output2[[7]], ncol = 7)

Plots

function10 <- function(argument)
{
a <- arransimd %>%
mutate(
    lon = map_dbl(geometry, ~st_centroid(.x)[[1]]),
    lat = map_dbl(geometry, ~st_centroid(.x)[[2]])
    ) %>%
filter(listID == argument)  %>%
  ggplot() +
  geom_sf(aes(fill = Percentile)) +
  facet_wrap('year') +
  theme_grey() +
  geom_text(aes(label = Percentile, x = lon, y = lat), size = 2, colour = "white") +
  theme(axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank(),
        axis.title.x = element_blank(),
        axis.title.y = element_blank()) +
  theme(legend.position="bottom")  

b <- arransimd %>%
  filter(listID == argument)  %>%
  ggplot() +
  geom_sf(data = arransubsect) +
  theme_grey() +
  theme(axis.text.x=element_text(angle=45, hjust = 1)) +
  theme(legend.position="bottom") +
  geom_sf(aes(fill = DataZone))

c <- arransimd %>%
  filter(listID == argument)  %>%
  ggplot() +
  geom_sf() +
  theme_grey() +
  geom_point(data = filter(newarrancoordinates, listID == argument), 
             mapping = aes(x = longitude, y = latitude), size=1) +
  geom_text_repel(data = filter(newarrancoordinates, listID == argument), 
            aes(label = filter(newarrancoordinates, 
                               listID == argument)$postcode, 
                x = longitude, y = latitude), size=2) +
  theme(axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank()) +
  coord_sf(crs= 4326, datum = sf::st_crs(4326))

grid.arrange(a, b, c, nrow = 1)
}
lapply(unique(arransimd$listID), function10)

## [[1]]
## TableGrob (1 x 3) "arrange": 3 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
## 3 3 (1-1,3-3) arrange gtable[layout]
## 
## [[2]]
## TableGrob (1 x 3) "arrange": 3 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
## 3 3 (1-1,3-3) arrange gtable[layout]
## 
## [[3]]
## TableGrob (1 x 3) "arrange": 3 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
## 3 3 (1-1,3-3) arrange gtable[layout]
## 
## [[4]]
## TableGrob (1 x 3) "arrange": 3 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
## 3 3 (1-1,3-3) arrange gtable[layout]
## 
## [[5]]
## TableGrob (1 x 3) "arrange": 3 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
## 3 3 (1-1,3-3) arrange gtable[layout]
## 
## [[6]]
## TableGrob (1 x 3) "arrange": 3 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
## 3 3 (1-1,3-3) arrange gtable[layout]
## 
## [[7]]
## TableGrob (1 x 3) "arrange": 3 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
## 3 3 (1-1,3-3) arrange gtable[layout]

See these plots at a larger scale.

a2 <- arransimd %>%
  ggplot() +
  geom_sf(aes(fill = Percentile)) +
  facet_grid(listID ~ year) +
  theme_grey() +
  geom_text(aes(label = Percentile, x = lon, y = lat), size = 2, colour = "white") +
  theme(axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank(),
        axis.title.x = element_blank(),
        axis.title.y = element_blank()) +
  theme(legend.position="bottom")  

b2 <- arransimd %>%
  filter(year == 2016)  %>%
  ggplot() +
  facet_wrap("listID") +
  geom_sf(data = arransubsect) +
  theme_grey() +
  theme(axis.text.x=element_text(angle=45, hjust = 1)) +
  theme(legend.position="none") +
  geom_sf(aes(fill = DataZone))

c2 <- arransimd %>%
 filter(year == 2016)  %>%
  ggplot() +
  facet_wrap("listID") +
  geom_sf() +
  theme_grey() +
  geom_point(data = newarrancoordinates, 
             mapping = aes(x = longitude, y = latitude), size=1) +
 geom_text_repel(data = newarrancoordinates, aes(label = newarrancoordinates$postcode, 
                x = longitude, y = latitude), size=2) +
  theme(axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank()) +
  coord_sf(crs= 4326, datum = sf::st_crs(4326))

grid.arrange(a2, b2, c2, ncol = 3)

See this plot at a proper scale.

See this data as an interactive map.